None
## import library
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
# Load in the hierarchy information
url = "https://raw.githubusercontent.com/bcaffo/MRIcloudT1volumetrics/master/inst/extdata/multilevel_lookup_table.txt"
multilevel_lookup = pd.read_csv(url, sep = "\t").drop(['Level5'], axis = 1)
multilevel_lookup = multilevel_lookup.rename(columns = {
"modify" : "roi",
"modify.1" : "level4",
"modify.2" : "level3",
"modify.3" : "level2",
"modify.4" : "level1"})
multilevel_lookup = multilevel_lookup[['roi', 'level4', 'level3', 'level2', 'level1']]
multilevel_lookup.head()
# load in the subject data
id = 127
subjectData = pd.read_csv("https://raw.githubusercontent.com/smart-stats/ds4bio_book/main/book/assetts/kirby21AllLevels.csv")
subjectData = subjectData.loc[(subjectData.type == 1) & (subjectData.level == 5) & (subjectData.id == id)]
subjectData = subjectData[['roi', 'volume']]
# Merge the subject data with the multilevel data
subjectData = pd.merge(subjectData, multilevel_lookup, on = "roi")
subjectData = subjectData.assign(icv = "ICV")
# Create seperate tables that group by icv&level1, level1&level2, level2&level3, level3&level4
dat_icvl1 = subjectData.drop(['roi','volume','level4', 'level3', 'level2'],\
axis = 1)
dat_l1l2 = subjectData.drop(['roi','volume','level4', 'level3', 'icv'],\
axis = 1)
dat_l2l3 = subjectData.drop(['roi','volume','level4', 'level1','icv'],\
axis = 1)
# create Sankey diagram
t_list = [('icv','level1'),('level1','level2'),('level2','level3')]
def df_sankey(df, cols_tuple_list):
s = pd.DataFrame([])
for t in cols_tuple_list:
s1 = df.groupby(by=[t[0],t[1]],axis=0).count()
s1 = s1.iloc[:,[0]]
s1.columns = ['value']
if s.shape[0]== 0:
s = s1
else:
s = pd.concat([s,s1],axis=0)
s.reset_index(inplace=True)
s.columns = ['source','target','value']
label_set = set(s['source'].unique()) | set(s['target'].unique())
labels = {v: k for k, v in enumerate(label_set)}
s.replace(labels, inplace=True)
return s,list(label_set)
s,labels = df_sankey(subjectData[['icv','level1','level2','level3']],t_list)
fig = go.Figure(data=[go.Sankey(
node = dict(
pad = 6,
thickness = 20,
line = dict(color = "black", width = 1),
label = labels,
),
link = dict(
source = s['source'].values,
target = s['target'].values,
value = s['value'].values
))])
fig.add_annotation(x=0, y=0.2, text="ICV", showarrow=False, yshift=-20, font=dict(size=12, color="black"))
fig.add_annotation(x=0.34, y=0.2, text="Level1", showarrow=False, yshift=-20, font=dict(size=12, color="black"))
fig.add_annotation(x=0.7, y=0.1, text="Level2", showarrow=False, yshift=-20, font=dict(size=12, color="black"))
fig.add_annotation(x=1, y=0.05, text="Level3", showarrow=False, yshift=-20, font=dict(size=12, color="black"))
fig.update_layout(title_text="Sankey", font_size=10, width=1000, height=1000)
fig.show()
# Load in data
annual = pd.read_csv('/Users/mujin/Library/CloudStorage/OneDrive-JohnsHopkins/academic file/ScM/Term 7/ds/Assignment 4/county_annual.csv')
fig = px.scatter(annual, x='year', y='count',color='BUYER_STATE', labels={'BUYER_STATE': 'State'}, render_mode='svg')
fig.update_layout(height=800, width=1000)
fig.show()